library(tidyverse)
library(wesanderson)
library(MetBrewer)
library(sp)
library(sf)
library(classInt)
library(raster)
library(fixest)
library(osmdata)


dhs <- read_rds("data/dhs_cluster_level_wealth.rds") #ave rwi normalized by survey for all dhs clusters in countries with wealth data

loc <- dhs %>% ungroup() %>%  dplyr::select(cluster_id, lon, lat) %>% distinct()
    
    # read in (publicly available) pm data and process for fig

    # grid <- raster(raster("data/annual_001/V5GL02.HybridPM25.Global.199801-199812.nc"))
    # grid[] <- 1:ncell(grid)
    # 
    # 
    # 
    # loc$cell <- cellFromXY(grid, as.matrix(loc[,c("lon","lat")]))
    # loc$row <- rowFromCell(grid, loc$cell)
    # loc$col <- colFromCell(grid, loc$cell)
    # 
    # 
    # fls <- list.files("1_input/data/vandonk/annual_001/"); fls <- fls[18:22]
    # datmat <- matrix(nrow = nrow(loc), ncol = length(fls))
    # 
    # rdat <- brick(x = grid, nl = length(fls))
    # 
    #       for (i in 1:length(fls)){
    #       
    #         rdat[[i]] <- (raster(paste("1_input/data/vandonk/annual_001/", fls[i], sep = "")))
    #       
    #       }
    #   
    #   yy <- rdat[loc$cell]
    #   
    #   write_rds(yy, file = "data/annual_dhs_vandonk_pm_raw.rds", compress = "gz")
    #   
    
# 
# 
#   output <- data.frame(cluster_id = loc$cluster_id, lon = loc$lon, lat = loc$lat, pm_mean = as.numeric(apply(yy,1,function(x){mean(x, na.rm = T)})))
#   output$pm_median = apply(yy,1,function(x){median(x, na.rm = T)})
#   output$pm_max = apply(yy,1,function(x){max(x, na.rm = T)})
#   output$pm_max[is.infinite(output$pm_max)]<-NA
#   output$pm_mean = round(output$pm_mean, 2)
#   
#   
#   
  #now need to repeat code for anthro pm data
  
  
  # 
  # grid <- raster(raster("1_input/data/vandonk/annual_no_ss/ACAG_PM25_noDUSTnoSEASALT_GWR_V4GL03_201401_201412_0p01.nc"))
  # grid <- flip(flip(t(grid), direction = "x"), direction = "y")
  # grid[] <- 1:ncell(grid)
  # 
  # 
  # 
  # loc$cell <- cellFromXY(grid, as.matrix(loc[,c("lon","lat")]))
  # loc$row <- rowFromCell(grid, loc$cell)
  # loc$col <- colFromCell(grid, loc$cell)
  # 
  # 
  # fls <- list.files("1_input/data/vandonk/annual_no_ss/")[16:20]
  # 
  # 
  # rdat <- raster::brick(x = grid, nl = length(fls))
  # 
  # for (i in 1:length(fls)){
  #   
  #   rdat[[i]] <- flip(flip(t(raster(paste("1_input/data/vandonk/annual_no_ss/", fls[i], sep = ""))), direction = "x"), direction = "y") 
  #   
  # }
  # 
  # 
  # zz <- rdat[loc$cell]
  # 
  # write_rds(zz, file = "data/annual_dhs_vandonk_pm_anthro_raw.rds", compress = "gz")
#   
#   
# #  output <- data.frame(cluster_id = loc$cluster_id, lon = loc$lon, lat = loc$lat, pm_mean = as.numeric(apply(yy,1,function(x){mean(x, na.rm = T)})))
#   output$pm_anthro_mean = as.numeric(apply(zz,1,function(x){mean(x, na.rm = T)}))
#   output$pm_anthro_median = apply(zz,1,function(x){median(x, na.rm = T)})
#   output$pm_anthro_max = apply(zz,1,function(x){max(x, na.rm = T)})
#   output$pm_anthro_max[is.infinite(output$pm_max)]<-NA
#   
#   
  
  
  
  
  
  
  
  
  # write_rds(output, file = "1_input/data/vandonk_rds/annual_dhs_vandonk_pm_clean.rds", compress = "gz")
  
  

  ########################################################################################################################
  
  
  dhs <- read_rds("data/dhs_cluster_level_wealth.rds") %>% drop_na(wealth)
  pm <- read_rds("data/annual_dhs_vandonk_pm_clean.rds")

  dhs <- dhs %>% left_join(pm)
  
  
  #regression
  
        dhs$wealth_n <- dhs$wealth/10000
          mod <- feols(pm_mean ~ wealth_n | svy_id, data = dhs)
          
          cty_list <- sort(unique(dhs$country))
          cty_mod <- data.frame(country = cty_list, est = NA, se = NA)
          
          for(i in 1:length(cty_list)){
            mod1 <- feols(pm_mean ~ wealth | svy_id, data = dhs[dhs$country == cty_list[i],])
            cty_mod[i,"est"] <- coef(mod1)[1]
            cty_mod[i,"se"] <- se(mod1)[1]
            
            
          }
          
          
          
          cty_mod <- cty_mod %>% arrange(est)
          
        
          
          svy_list <- sort(unique(dhs$svy_id))
          dhs$wealth_p <- NA
          
          for(i in 1:length(svy_list)){
            dhs$wealth_p[dhs$svy_id == svy_list[i]] <-  statar::xtile(dhs$wealth[dhs$svy_id == svy_list[i]], n = 100)
                                      }
          
          
          mod <- feols(pm_mean ~ wealth_p | svy_id, data = dhs)
          
          
          dhs <- dhs %>% arrange(svy_id, wealth, wealth_p)
          

          
          ###########
          
          coef1 <- read_csv("data/country_coefficients_collapsed.csv")
          
          world <- read_rds("figdat/world_borders.rds")
          
          #deal with India Pakistan
          row <- coef1 %>% filter(country=="IND_PAK") %>% mutate(country = replace(country, country=="IND_PAK","PAK"))
          coef1 <- coef1 %>% mutate(country = replace(country, country=="IND_PAK","IND"))
          coef1 <- rbind(coef1, row)
          
          sum(coef1$country %in% world$ISO3) #92 countries match
          sum(coef1$country %in% world$ISO3 == F) #11 do not
          coef1$country[coef1$country %in% world$ISO3 ==F] #print countries that don't match (all small islands besides india pakistan)
          
          matchlist <- world@data[,c("NAME","ISO3")]
          matchlist <- matchlist %>% rename(country = NAME)
          
          sum(cty_mod$country %in% matchlist$country==F)
          cty_mod$country[cty_mod$country %in% matchlist$country==F]
          
          matchlist$country[matchlist$country=="Timor-Leste"]<-"Timor"
          matchlist$country[matchlist$country=="Kyrgyzstan"]<-"Kyrgyz Republic"
          
          
          cty_mod <- left_join(cty_mod, matchlist)
          coef1$ISO3 = coef1$country
          
          
          cty_mod <- left_join(cty_mod, coef1[,c("ISO3","mean_beta")])
          cty_mod$est_rescale = cty_mod$est*1e6/5
          
          
          
          pdf(file = "figures/figure_SI8.pdf", width = 8, height = 8)
          
          bg = rep("gray90", nrow(cty_mod))
          bg[cty_mod$est_rescale < 0 & cty_mod$mean_beta > 0 ]<-'gray90'
          bg[cty_mod$est_rescale > 0 & cty_mod$mean_beta < 0 ]<-'gray90'
          
          plot(cty_mod$mean_beta, cty_mod$est_rescale, axes=F, xlab = "",ylab = "", col = NA, ylim = c(-10, 15), xlim = c(-10, 15))
          points(cty_mod$mean_beta, cty_mod$est_rescale, pch = 21, col = 'navy', bg = bg,cex=2)
          axis(1, tick = T)
          axis(2, tick = T, las = 2)
          
          mtext(side  =2 , text = "Using Relative Wealth Measures from DHS",line=2.5, cex=1.5)
          mtext(side  =1 , text = "Using RWI Estimates from Chi et al 2022",line=3.5, cex=1.5)
          
          
          lines(x = -10:15, y = predict(lm( est_rescale ~ mean_beta, cty_mod), newdata = data.frame(mean_beta = -10:15)),lwd=3,col='navy')
          
          
          dev.off()
          
          
          
          
          
          

